1 Isotope data

1.1 Raw data

From the directory:

O:\FWWR1708\Working\Task 4 - Field\Isotope work\writing\Paper 2. Bruce lead\data\Bruce precip analysis

The data file for measured data is “precip.iso.data_BD.csv”.

For the measured data file, some characterisations and summaries follow.

ddata <- readr::read_csv("data/precip.iso.data_BD.csv")
ddata.orig <- ddata

slice_head(ddata.orig, n = 5)
site.number sample.id year month variable val agent.number
1 70801 2007 8 d18O -4.41 20661
1 70901 2007 9 d18O -5.71 20661
1 71001 2007 10 d18O -2.74 20661
1 71101 2007 11 d18O -4.72 20661
1 71201 2007 12 d18O -2.54 20661

The variable column contains six “variables”: d18O, dD, d.excess, lat, long.

ddata |> count(variable)
variable n
d.excess 1466
d18O 1466
dD 1466
lat 4398
long 4398

The day of the month for the measurement isn’t given, so the measurement is located in the middle of the month.

ddata <- filter(ddata.orig, variable == "d18O") |>
  rename(d18O = val, Site = site.number) |>
  mutate(Date = lubridate::make_date(year = year, month = month, day = 15))

d2H.

ddata2 <- filter(ddata.orig, variable == "dD") |>
  rename(dD = val, Site = site.number) |>
  mutate(Date = lubridate::make_date(year = year, month = month, day = 15))
skimr::skim(ddata)
Data summary
Name ddata
Number of rows 1466
Number of columns 8
_______________________
Column type frequency:
character 2
Date 1
numeric 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
sample.id 0 1 5 5 0 1465 0
variable 0 1 4 4 0 1 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
Date 0 1 2007-08-15 2010-06-15 2008-11-15 35

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Site 0 1.00 29.34 16.26 1.00 16.00 29.00 43.00 58.00 ▆▇▇▆▇
year 0 1.00 2008.33 0.82 2007.00 2008.00 2008.00 2009.00 2010.00 ▃▇▁▇▁
month 0 1.00 6.82 3.52 1.00 4.00 7.00 10.00 12.00 ▆▅▃▆▇
d18O 8 0.99 -6.16 2.58 -16.33 -7.78 -5.86 -4.28 -0.11 ▁▁▅▇▂
agent.number 0 1.00 22982.15 6548.22 7524.00 19840.00 21811.00 28760.00 31173.00 ▁▂▅▁▇

dD

skimr::skim(ddata2)
Data summary
Name ddata2
Number of rows 1466
Number of columns 8
_______________________
Column type frequency:
character 2
Date 1
numeric 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
sample.id 0 1 5 5 0 1465 0
variable 0 1 2 2 0 1 0

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
Date 0 1 2007-08-15 2010-06-15 2008-11-15 35

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Site 0 1 29.34 16.26 1.00 16.00 29.00 43.00 58.00 ▆▇▇▆▇
year 0 1 2008.33 0.82 2007.00 2008.00 2008.00 2009.00 2010.00 ▃▇▁▇▁
month 0 1 6.82 3.52 1.00 4.00 7.00 10.00 12.00 ▆▅▃▆▇
dD 4 1 -38.07 20.86 -123.39 -50.78 -34.74 -22.84 5.54 ▁▁▃▇▃
agent.number 0 1 22982.15 6548.22 7524.00 19840.00 21811.00 28760.00 31173.00 ▁▂▅▁▇

1.2 Locations of sites

Site information (e.g. lat, long, elevation, district) is in the file “lookup_rainsite_VCSN.csv”. Or at least the closest VCSN.

site.info.orig <- readr::read_csv("data/lookup_rainsite_VCSN.csv")

site.info <- site.info.orig |>
  dplyr::select(Site = site_numbe, area, lat, lon = long, height = ELEVATION)
slice_head(site.info, n = 5)
Site area lat lon height
1 Far North District -35.125 173.275 34.9724
2 Kaipara District -35.975 173.925 31.4201
3 Whangarei District -35.575 174.475 71.3756
4 Far North District -35.425 173.825 196.6477
5 Far North District -35.325 174.125 33.8324

Site location is shown by North Island and South Island.

iso_plot_site_locations(site.info, "Site", "lat", "lon", "NI")

iso_plot_site_locations(site.info, "Site", "lat", "lon", "SI")

1.3 Plots of raw data

iso_plot_raw(ddata, "Site", "Date", "d18O",
  "Raw d18O by site (1-20)",
  sites = paste(1:20)
)

iso_plot_raw(ddata, "Site", "Date", "d18O",
  "Raw d18O by site (21-41)",
  sites = paste(21:41)
)

iso_plot_raw(ddata, "Site", "Date", "d18O",
  "Raw d18O by site (42-58)",
  sites = paste(42:58)
)

iso_plot_raw(ddata2, "Site", "Date", "dD",
  "Raw dD by site (1-20)",
  sites = paste(1:20)
)

iso_plot_raw(ddata2, "Site", "Date", "dD",
  "Raw dD by site (21-41)",
  sites = paste(21:41)
)

iso_plot_raw(ddata2, "Site", "Date", "dD",
  "Raw dD by site (42-58)",
  sites = paste(42:58)
)

1.4 Save final isotope data set

Sites with few data points are dropped, and NA measured values.

ddata <- ddata |>
  filter(!(Site %in% c(8, 9, 12, 40, 45))) |>
  drop_na(d18O)

Same for dD.

ddata2 <- ddata2 |>
  filter(!(Site %in% c(8, 9, 12, 40, 45))) |>
  drop_na(dD)
save(ddata,  file = "ProcessedData/ddata.RData")
slice_head(ddata, n = 5)
Site sample.id year month variable d18O agent.number Date
1 70801 2007 8 d18O -4.41 20661 2007-08-15
1 70901 2007 9 d18O -5.71 20661 2007-09-15
1 71001 2007 10 d18O -2.74 20661 2007-10-15
1 71101 2007 11 d18O -4.72 20661 2007-11-15
1 71201 2007 12 d18O -2.54 20661 2007-12-15
save(ddata2, file = "ProcessedData/ddata2.RData")
slice_head(ddata2, n = 5)
Site sample.id year month variable dD agent.number Date
1 70801 2007 8 dD -24.44 20661 2007-08-15
1 70901 2007 9 dD -33.89 20661 2007-09-15
1 71001 2007 10 dD -11.46 20661 2007-10-15
1 71101 2007 11 dD -28.23 20661 2007-11-15
1 71201 2007 12 dD -16.98 20661 2007-12-15

2 The VCSN climate and environmental data

2.1 The virtual climate station network (VCSN) data

An overview of the virtual climate station data is available here:

https://niwa.co.nz/climate/our-services/virtual-climate-stations

“Virtual Climate station Network (VCSN) data are estimates of daily rainfall, potential evapotranspiration, air and vapour pressure, maximum and minimum air temperature, soil temperature, relative humidity, solar radiation, wind speed and soil moisture on a regular (~5km) grid covering the whole of New Zealand. The estimates are produced every day, based on the spatial interpolation of actual data observations made at climate stations located around the country.”

One form in which the VCSN data are available is a series of yearly zipped up files in the directory.

“Q:/CLIMATE/vcsn_data/”

The CRS seems to be NZGD 1949

https://one.niwa.co.nz/display/CLIDB/VCSN+Grid+Points

As described here:

https://www.linz.govt.nz/data/geodetic-system/datums-projections-and-heights/geodetic-datums/new-zealand-geodetic-datum-1949-nzgd1949

With EPSG number of 27258

https://epsg.io/27258

2.2 Extracting daily VCSN daily data

Some function were made to explore and extract the VCSN data:

  1. vcsn_info()
  2. vcsn_agent_locations()
  3. vcsn_append_nearest_vcsn()
  4. vcsn_append_climate_information()

These functions are used below.

Set up the directory with the VCSN climate data. Sometimes before extracting data, the Q drive will need to be “woken up” by clicking on it in Windows Explorer.

# VCSN.directory <- "Q:/CLIMATE/vcsn_data/"
# vcsn_info(VCSN.directory)

Find all the vcsn climate station/agent locations. The output is an sf class object, and can be plotted.

# vcsn.agent.locations  <- vcsn_agent_locations(VCSN.directory) 
# save(vcsn.agent.locations, file = "ProcessedData/vcsn.agent.locations.RDdata")
load(file = "ProcessedData/vcsn.agent.locations.RDdata")
slice_head(vcsn.agent.locations, n = 5)
Agent vcsn.lon vcsn.lat geometry
3027 166.825 -45.325 POINT (166.825 -45.325)
3106 166.825 -45.375 POINT (166.825 -45.375)
3380 166.825 -45.425 POINT (166.825 -45.425)
4782 166.825 -45.475 POINT (166.825 -45.475)
5878 166.825 -45.525 POINT (166.825 -45.525)
plot(vcsn.agent.locations["Agent"], cex = 0.5, pch = 16)

Append nearest agent information and daily climate data for 2007–2022. This takes a while, so is done once and the results saved, then loaded.

# years <- 2023
# site.info.plus.climate.data <- vcsn_append_climate_information(site.info,VCSN.directory,

#                                                       years)

# save(site.info.plus.climate.data, file = "output/site.info.plus.climate.data.RData")

load(file = "ProcessedData/site.info.plus.climate.data.RData")
slice_head(site.info.plus.climate.data, n = 5)
Site area lat lon height VCSN.Agent VCSN.lat VCSN.lon Date MSLP PET Rain RH SoilM ETmp Rad TMax Tmin VP Wind Rain_bc Tmax_N Tmin_N
1 Far North District -35.125 173.275 34.9724 20661 -35.125 173.275 2007-01-01 1015.4 4.7 0 76.2 -113.3 16.4 28.3 20.1 8.4 12.6 3.9 0 19.9 8.2
2 Kaipara District -35.975 173.925 31.4201 21431 -35.975 173.925 2007-01-01 1014.5 4.3 0 73.4 -117.4 17.2 23.9 19.8 9.4 12.7 3.7 0 19.7 9.5
3 Whangarei District -35.575 174.475 71.3756 21815 -35.575 174.475 2007-01-01 1014.6 4.2 0 71.0 -118.3 17.4 26.9 21.2 10.4 12.6 3.6 0 21.5 10.4
4 Far North District -35.425 173.825 196.6477 29644 -35.425 173.825 2007-01-01 1015.0 4.1 0 74.3 -91.2 16.5 20.6 19.9 7.8 12.1 3.1 0 19.8 7.8
5 Far North District -35.325 174.125 33.8324 21536 -35.325 174.125 2007-01-01 1015.0 4.1 0 72.2 -112.2 17.8 26.2 22.0 9.7 12.4 2.8 0 22.4 9.6

The climate fields are identified as

https://one.niwa.co.nz/display/CLIDB/VCSN+Times+and+Field+Descriptions

https://one.niwa.co.nz/display/SYSTEMS/The+size+of+the+problem

  1. MSLP (Mean Sea Level Pressure)
  2. PET (Potential Evapotranspiration)
  3. Rain (daily rainfall)
  4. RH (relative humidity)
  5. SoilM (soil moisture)
  6. ETmp (earth temperature at 10cm depth)
  7. Rad (solar radiation)
  8. TMax (maximum temperature)
  9. Tmin (minimum temperature)
  10. VP (vapour pressure)
  11. Wind (average wind speed at 10m above ground level)
  12. Rain_bc (Rainfall with bias correction)
  13. Tmax_N (maximum temperature via different method)
  14. Tmin_N (minimum temperature via different method)

Potential evaporation or potential evapotranspiration is defined as the amount of evaporation that would occur if a sufficient water source were available.

Rain_bc, Tmax_n, Tmin_N were introduced in 2019.

2.3 Mean value from 2007-2022

Convert to long format and make a summary via the mean value 2007–2022 by site.

site.info.plus.climate.data.long <- site.info.plus.climate.data |> 
  relocate(height, .after = "Date") |> 
  pivot_longer(height:Tmin_N, names_to = "Site quantity", values_to = "Value") |> 
  dplyr::select(-c("area", "lat", "lon"))

slice_head(site.info.plus.climate.data.long, n = 5)
Site VCSN.Agent VCSN.lat VCSN.lon Date Site quantity Value
1 20661 -35.125 173.275 2007-01-01 height 34.9724
1 20661 -35.125 173.275 2007-01-01 MSLP 1015.4000
1 20661 -35.125 173.275 2007-01-01 PET 4.7000
1 20661 -35.125 173.275 2007-01-01 Rain 0.0000
1 20661 -35.125 173.275 2007-01-01 RH 76.2000
# mean values over 2007-2022 (inclusive)
site.climate.summary <- site.info.plus.climate.data.long |> 
  group_by(Site, VCSN.lat, VCSN.lon, `Site quantity`) |> 
  summarise(mean = mean(Value, na.rm = TRUE)) |> 
  pivot_wider(names_from = `Site quantity`, values_from = mean) |> 
  ungroup()

slice_head(site.climate.summary, n = 5)
Site VCSN.lat VCSN.lon ETmp MSLP PET RH Rad Rain Rain_bc SoilM TMax Tmax_N Tmin Tmin_N VP Wind height
1 -35.125 173.275 16.02943 1017.120 2.936140 84.48234 15.37604 3.968789 3.975873 -46.10077 20.32240 20.36490 12.33893 12.33280 14.89418 4.105151 34.9724
2 -35.975 173.925 16.16725 1016.914 2.860849 82.87960 14.88270 3.070722 2.976318 -55.18568 19.90358 19.82599 11.80717 11.81980 14.66994 3.465092 31.4201
3 -35.575 174.475 15.78417 1017.205 2.786037 82.68727 14.51177 3.812200 4.133676 -46.91085 20.11838 20.06369 12.59993 12.46001 14.84321 3.164493 71.3756
4 -35.425 173.825 15.24098 1016.628 2.695568 85.71193 14.53467 4.424469 4.088518 -34.59187 19.25228 19.27521 11.21182 11.31184 14.58049 2.951643 196.6477
5 -35.325 174.125 16.13272 1017.069 2.780869 83.63652 14.42936 3.949726 3.543600 -41.56413 20.64189 20.59630 12.34612 12.00613 14.87118 2.330493 33.8324
rm(site.info.plus.climate.data.long)

Calculate mean annual range of monthly temperatures:

Take monthly average daily maximum temperature for each site and month of the time series

Pick the highest month for each year

Pick the lowest for each year

Subtract lowest from highest for each year to give annual range

Take the average range across all years == mean annual range of monthly temperatures

#  Convert to date if not already
site.info.plus.climate.data$Date <- as.Date(site.info.plus.climate.data$Date)
#  Get months
site.info.plus.climate.data$Month <- months(site.info.plus.climate.data$Date)
#  Get years
site.info.plus.climate.data$Year <- format(site.info.plus.climate.data$Date,format="%y")

#Get mean annual range of maximum daily temperatures (averaged for each month) each VCSN.Agent, month and year combination

aggy <- aggregate(TMax ~ VCSN.Agent + Site + Month + Year, data = site.info.plus.climate.data, mean)

aggyMAX <- aggregate(TMax ~ VCSN.Agent + Site + Year, data = aggy, max)
aggyMIN <- aggregate(TMax ~ VCSN.Agent + Site + Year, data = aggy, min)
aggyRANGE <- aggyMIN
aggyRANGE$AnnualTempRange <- aggyMAX$TMax - aggyMIN$TMax
annualranges <- aggregate(AnnualTempRange ~ VCSN.Agent + Site, data = aggyRANGE, mean)

site.climate.summary<-dplyr::left_join(site.climate.summary, annualranges, by = "Site")

rm(site.info.plus.climate.data)
rm(aggy)
rm(aggyMAX)
rm(aggyMIN)
rm(aggyRANGE)

Now make the same VCSN summary file for 2007-2022, but across all VCSN points so that we can use it to extrapolate regression relationships across the country

slice_head(vcsn.agent.locations, n = 5) 
Agent vcsn.lon vcsn.lat geometry
3027 166.825 -45.325 POINT (166.825 -45.325)
3106 166.825 -45.375 POINT (166.825 -45.375)
3380 166.825 -45.425 POINT (166.825 -45.425)
4782 166.825 -45.475 POINT (166.825 -45.475)
5878 166.825 -45.525 POINT (166.825 -45.525)
dim(vcsn.agent.locations)
[1] 11491     4
#get list of all VCSN agent numbers and elevations

elevations<-read.csv("data/VCS_elevations.csv")
keeps<-subset(elevations, select = c(AGENT_NO, ELEVATION))
names(keeps)<-c("Agent", "elevation")
elevations<-keeps

# # test run
# #  vcsn.2007.2022 <- vcsn_combine_daily_data(VCSN.directory, years = 2007)
# # test run went fine but the line below took over a day and needs to be run on the modelling computer
# # vcsn.2007.2022 <- vcsn_combine_daily_data(VCSN.directory, years = 2007:2022)
# # save(vcsn.2007.2022, file = "output/vcsn.2007.2022.RData")

# load("output/vcsn.2007.2022.RData")

# # calculate annual ranges of monthly average temperatures

# #  Convert to date if not already
# vcsn.2007.2022$Date <- as.Date(vcsn.2007.2022$Date)
# #  Get months
# vcsn.2007.2022$Month <- months(vcsn.2007.2022$Date)
# #  Get years
# vcsn.2007.2022$Year <- format(vcsn.2007.2022$Date,format="%y")

#  # Get mean annual range of maximum daily temperatures (averaged for each month) for each VCSN.Agent, month and year combination

# aggy1 <- aggregate(TMax ~ Agent + Month + Year, data = vcsn.2007.2022, mean)
#
# aggyMAX <- aggregate(TMax ~ Agent + Year, data = aggy1, max)
# aggyMIN <- aggregate(TMax ~ Agent + Year, data = aggy1, min)
#
# aggyRANGE<-aggyMIN
# aggyRANGE$AnnualTempRange<-aggyMAX$TMax - aggyMIN$TMax
#
# annualranges<-aggregate(AnnualTempRange ~ Agent, data = aggyRANGE, mean)


# vcsn.mean <- vcsn.2007.2022 |>
#   dplyr::select(-Tmin, -TMax,
#                 -Rain)  |>
#   rename(lat = Lat, lon = Longt) |>
#   group_by(Agent, lat, lon) |>
#   summarise(across(.cols = MSLP:Rain_bc, .fns = ~mean(.x, na.rm = TRUE)))
#
# national.climate.summary<-dplyr::left_join(vcsn.mean, annualranges, by = "Agent")
# #add elevations
# national.climate.summary<-dplyr::left_join(national.climate.summary, elevations, by = "Agent")
#
# names(vcsn.mean)
# vcsn.mean<-national.climate.summary[,1:14]
# names(vcsn.mean)<-c("Agent", "lat", "lon", "MSLP", "PET" , "RH"      ,        "SoilM"    ,       "ETmp"
#  ,"Rad"     ,        "VP"      ,        "Wind"      ,      "Rain_bc"   ,      "AnnualTempRange", "height")


# removing large R objects
# rm(vcsn.2007.2022)
# rm(aggy1)
# rm(aggyMAX)
# rm(aggyMIN)
# rm(aggyRANGE)

# save(national.climate.summary, file = "ProcessedData/national.climate.summary.RData")
# save(vcsn.mean, file = "ProcessedData/vcsn.mean.RData")

load("ProcessedData/national.climate.summary.RData")
load("ProcessedData/vcsn.mean.RData")

Introduce an “island” column with values South Island/North Island.

site.climate.summary <- site.climate.summary |> 
  mutate(island = if_else(VCSN.lon <= 174.49 & VCSN.lat <= -40.18, 
                          "South Island",
                          "North Island")) |> 
  relocate(island, .after = Site) 

Change some names and add the same “island” column for the national VCSN dataframe for mapping later

names(national.climate.summary)<-c("VCSN.Agent", "VCSN.lat", "VCSN.lon", "MSLP", "PET", "RH", "SoilM", "ETmp", 
 "Rad", "VP", "Wind", "Rain_bc", "AnnualTempRange", "height")

national.climate.summary <- national.climate.summary |> 
  mutate(island = if_else(VCSN.lon <= 174.49 & VCSN.lat <= -40.18, 
                          "South Island",
                          "North Island")) |> 
  relocate(island, .after = height) 

Make climate summary by site data frame into an sf object. Make climate summary by site data frame into an sf object.

site.climate.summary.sf <- site.climate.summary |> 
  sf::st_as_sf(coords = c("VCSN.lon", "VCSN.lat"), crs = 4326)

Make national climate summary into an sf object…and take a look at the annual temperature range nationally

vcsn.agent.locations1 <- vcsn.agent.locations
names(vcsn.agent.locations1) <- c("VCSN.Agent", "vcsn.lon", "vcsn.lat", "geometry")
national.climate.summary.sf <- dplyr::full_join(national.climate.summary, 
                                                vcsn.agent.locations1,
                                                by = "VCSN.Agent")
national.climate.summary.sf <- st_as_sf(national.climate.summary.sf)

plot(national.climate.summary.sf["AnnualTempRange"], cex = 0.5, pch = 16)

2.4 Plots of mean rainfall and soil moisture at sites

Plot showing mean rainfall and soil moisture at sites. Both have potential to be proxies for orographic effects due to mountain ranges.

Rain_bc the bias corrected version of Rain is used.

plot(site.climate.summary.sf["Rain_bc"], cex = 0.5, pch = 16)

nzmap <- rnaturalearth::ne_countries(scale = 'large', 
                      country = "New Zealand",
                      returnclass = 'sf')
ggplot() + 
    geom_sf(data = nzmap, fill = "tan1") + 
    geom_sf(data = site.climate.summary.sf,
            mapping = aes(size = Rain_bc, colour = Rain)) +
    coord_sf(xlim = c(164.8, 179.4), ylim = c(-47.7, -33.8)) +
    scale_colour_viridis_c() +
    xlab("Longitude") +
    ylab("Latitude") +
    ggtitle("Mean daily rainfall (2007-2022)")

ggplot() + 
    geom_sf(data = nzmap, fill = "tan1") + 
    geom_sf(data = site.climate.summary.sf,
            mapping = aes(size = SoilM, colour = SoilM)) +
    coord_sf(xlim = c(164.8, 179.4), ylim = c(-47.7, -33.8)) +
    scale_colour_viridis_c() +
    xlab("Longitude") +
    ylab("Latitude") +
    ggtitle("Mean soil moisture (2007-2022)")

ggplot() + 
    geom_sf(data = nzmap, fill = "tan1") + 
    geom_sf(data = site.climate.summary.sf,
            mapping = aes(size = -SoilM, colour = -SoilM)) +
    coord_sf(xlim = c(164.8, 179.4), ylim = c(-47.7, -33.8)) +
    scale_colour_viridis_c() +
    xlab("Longitude") +
    ylab("Latitude") +
    ggtitle("Negative mean soil moisture (2007-2022)")

The soil moisture plateaus as rainfall increases.

ggplot(site.climate.summary.sf, aes(x = Rain_bc, y = SoilM)) +
  geom_point(colour = "blue") +
  xlab("Mean daily rainfall") +
  ylab("Mean soil moisture") +
  ggtitle("Sites: soil moisture vs rainfall (mean 2007-09)")

2.5 Correlation between VCSN predictor variables

There is high correlation between many VCSN variables, and for the purposes of latter regression/correlation analyses with estimated sinusoidal parameters, some VCSN variables can be dropped.

Concentrating on “temperature” variables, Tmin and TMax are highly correlated with the variables Tmin_N, and Tmax_n, which in turn are highly correlated with ETmp (earth temperature at 10cm depth).

So ETmp is retained but the other four are dropped (Tmin, TMax, Tmin_N, Tmax_N).

We will also retain the AnnualTempRange metric

site.T <- site.climate.summary |> 
  dplyr::select(ETmp, Rad, TMax, Tmin, Tmax_N, Tmin_N)

GGally::ggpairs(site.T)

corrplot::corrplot(cor(site.T), diag = FALSE, order = "hclust" )

Concentrating on “non-temperature” variables. Rain is dropped and the bias-corrected version Rain_bc is retained.

site.non.T <- site.climate.summary |> 
  dplyr::select(MSLP, PET, Rain, SoilM, VP, Wind, Rain_bc, height, AnnualTempRange)

GGally::ggpairs(site.non.T)

corrplot::corrplot(cor(site.non.T), diag = FALSE, order = "hclust" )

site.remain <- site.climate.summary |> 
  dplyr::select(-Site, -island, 
                -TMax, -Tmin, -Tmax_N, -Tmin_N,
                -Rain)

GGally::ggpairs(site.remain)

corrplot::corrplot(cor(site.remain), diag = FALSE, order = "hclust" )

3 Saved data

These data are need for the next block of code (01-isoscape-model-fits-regressions.Rmd).

3.1 d18O data

save(ddata, file = "Output/Data/ddata.RData")
dim(ddata)
[1] 1436    8
names(ddata)
[1] "Site"         "sample.id"    "year"         "month"        "variable"    
[6] "d18O"         "agent.number" "Date"        

3.2 Site climate mean value data

Mean value of climate variable at a site, for the closest agent. Taking the mean value over all data for 2007 to 2022.

save(site.climate.summary, file = "Output/Data/site.climate.summary.RData")
dim(site.climate.summary)
[1] 55 21
names(site.climate.summary)
 [1] "Site"            "island"          "VCSN.lat"        "VCSN.lon"       
 [5] "ETmp"            "MSLP"            "PET"             "RH"             
 [9] "Rad"             "Rain"            "Rain_bc"         "SoilM"          
[13] "TMax"            "Tmax_N"          "Tmin"            "Tmin_N"         
[17] "VP"              "Wind"            "height"          "VCSN.Agent"     
[21] "AnnualTempRange"

3.3 Agent locations

save(vcsn.agent.locations, file = "Output/Data/vcsn.agent.locations.RData")
dim(vcsn.agent.locations)
[1] 11491     4
names(vcsn.agent.locations)
[1] "Agent"    "vcsn.lon" "vcsn.lat" "geometry"

Same things, but slightly different column names.

save(vcsn.agent.locations1, file = "Output/Data/vcsn.agent.locations1.RData")
dim(vcsn.agent.locations1)
[1] 11491     4
names(vcsn.agent.locations1)
[1] "VCSN.Agent" "vcsn.lon"   "vcsn.lat"   "geometry"  

3.4 Agent climate mean value data

Mean value of climate data at an agent location. Taking the mean over all data from 2007 to 2022. Also includes height at the agent location.

save(vcsn.mean, file = "Output/Data/vcsn.mean.RData")
dim(vcsn.mean)
[1] 11491    14
names(vcsn.mean)
 [1] "Agent"           "lat"             "lon"             "MSLP"           
 [5] "PET"             "RH"              "SoilM"           "ETmp"           
 [9] "Rad"             "VP"              "Wind"            "Rain_bc"        
[13] "AnnualTempRange" "height"         

A slightly different version:

  1. prefix of VCSN for “Agent”,“lat”, “long”,
  2. includes “island” column (South Island/North Island).
save(national.climate.summary, file = "Output/Data/national.climate.summary.RData")
dim(national.climate.summary)
[1] 11491    15
names(national.climate.summary)
 [1] "VCSN.Agent"      "VCSN.lat"        "VCSN.lon"        "MSLP"           
 [5] "PET"             "RH"              "SoilM"           "ETmp"           
 [9] "Rad"             "VP"              "Wind"            "Rain_bc"        
[13] "AnnualTempRange" "height"          "island"